ggplot2 的理解

## 加载包, 准备数据
library(tidyverse)
library(dplyr)
library(ggplot2)
library(scales)
library(ggsci)
library(cowplot)
library(RColorBrewer)
library(paletteer)
library(ggrepel)
library(ggridges)
# p值计算相关包
library(ggsignif) # 推荐 (ggplot 语法)
library(ggstatsplot) # 快捷探索性 (自定义度低)

data(mtcars)
data("diamonds") # 导入 ggplots 的内置数据
small_diamonds <- sample_n(diamonds, size = 500) # dplyr 中随机抽取 500 个值
## 画布, xy映射
ggplot(data = small_diamonds, aes(x = carat, y = price)) + # 创建画布
  geom_point(aes(color = cut)) + # 创建点图, 颜色映射到 cut 列
  scale_color_npg() + # 更改映射颜色 (CNS 标度)
  theme_classic() # 更换 x, y轴主题

## Mark 👍
## 更改点的形状大小 (推荐 21)
## 添加黑框
## 图列
ggplot(data = small_diamonds, aes(x = carat, y = price)) + 
  geom_point(shape = 21, # 改变点的形状 (推荐为21 - 带黑框镂空)
             size = 4, # 大小 (内置的形状序号)
             stroke = 0.5, # 边框的粗细
             aes(fill = cut)) + # 添加黑色边框, 填充色映射给 cut
  scale_fill_npg() + # 标度更换为填充色
  theme_classic()

## 点的形状还不满足可以下载扩展的包
## ggstar
## ggimage (图片的形式)
## Mark 👍
## 映射点的大小
## 泡泡图 (点较少比较好看)
## 透明度 alpha
## 去除色条 (连续/离散)
data(mtcars) # 加载数据

ggplot(mtcars, aes(x = wt, y = mpg)) + 
  geom_point(shape = 21, 
             alpha = 0.5, # 设置透明度
             aes(size = disp, fill = factor(cyl))) + # 点大小, 填充色的映射
      # 数字变量默认为连续性, 出现色条, 需要 factor 将连续型变量转换为离散
  scale_fill_npg() + 
  scale_size(range = c(1, 20)) + # 保留格子比较好看# 添加点大小的标度, range (设置点大小的范围)
  theme_bw()

## 添加 title, x,y轴的标题, 图列标题
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
  geom_point(shape = 21, size = 4, 
             color = 'black', aes(fill = cut)) +
  scale_fill_npg() + 
  labs(title = 'point plot', # 设置 title
       x = 'weight of the diamond ', # 设置 x轴的标题
       y = 'price in US dollars', # 设置 y轴的标题
       fill = 'quality of the cut') + # 设置映射图列的标题
  theme_classic()

## 设置x, y轴的刻度
ggplot(data = small_diamonds, aes(x = carat, y = price)) +
  geom_point(shape = 21, size = 4, 
             color = 'black', aes(fill = cut)) +
  scale_fill_npg() + 
  labs(title = 'point plot', # 设置 title
       x = 'weight of the diamond ', # 设置 x轴的标题
       y = 'price in US dollars', # 设置 y轴的标题
       fill = 'quality of the cut') + # 设置映射图列的标题
  scale_x_continuous(breaks = seq(0,3,0.5)) + # x轴的刻度 (0-3, 以0.5为刻度)
  scale_y_continuous(breaks = seq(0, 15000, 5000), # y轴刻度 (0-15000, 5000)
                     labels = c('0', '5K', '10K', '15K')) + # 进一步设置 y轴刻度的标签
  theme_classic()

## 主题布局
# devtools::install_github("calligross/ggthemeassist") # ggplot2 图形化美化插件

## 选中已有图代码 - 上方工具栏 - Addins
p_dv <- 
  ggplot(data = small_diamonds, aes(x = carat, y = price)) +
  geom_point(shape = 21, size = 4, 
             color = 'black', aes(fill = cut)) +
  scale_fill_npg() + 
  labs(title = 'point plot', 
       x = 'weight of the diamond ', 
       y = 'price in US dollars', 
       fill = 'quality of the cut') + 
  scale_x_continuous(breaks = seq(0,3,0.5)) + 
  scale_y_continuous(breaks = seq(0, 15000, 5000), 
                     labels = c('0', '5K', '10K', '15K')) + 
  # ggplot Theme Assistant 生成
  theme_classic() + theme(axis.title = element_text(size = 15,
    face = "bold"), plot.title = element_text(size = 20,
    face = "bold.italic", hjust = 0.5, vjust = 0.75),
    legend.text = element_text(size = 15),
    legend.title = element_text(size = 15),
    strip.text = element_text(size = 10),
    legend.position = c(0.1, 0.65)) + theme(legend.position = c(0.12, 0.65)) + theme(legend.text = element_text(size = 13),
    legend.title = element_text(size = 13),
    legend.position = c(0.14, 0.75)) + theme(axis.title = element_text(face = "bold.italic"))
p_dv

-————————————————————————————————————————————————————————————————

主题

library(cowplot)
## Mark 👍
## 图形的拼接

### ggplot2 自带主题: theme_(bw), theme_few(), theme_cowplot(), theme_minimal()
p_dv1 <- p_dv + theme_bw()
p_dv1

# p_dv2 <- p_dv + theme_classic()
# p_dv3 <- p_dv + theme_dark()
# p_dv4 <- p_dv + theme_minimal_grid()

### cowp_dvlot 主题: theme_half_op_dven(), theme_minimal_grid(), 
              ### theme_minimal_hgrid(), theme_minimal_vgrid()
# p_dv5 <- p_dv + theme_half_op_dven()
# p_dv6 <- p_dv + theme_minimal_grid()
# p_dv7 <- p_dv + theme_minimal_hgrid()
# p_dv8 <- p_dv + theme_minimal_vgrid()

# LETTERS() # 随机生成字母
# plot_grid(p_dv1, p_dv2, p_dv3, p_dv4, labels = LETTERS
#          , ncol = 3 # 改变行列的布局
#          ) # 将 p_dv1, p_dv2, p_dv3, p_dv4 拼接在一起, 并标注字母序号
# plot_grid(p_dv6, p_dv7, p_dv8, p_dv9, labels = LETTERS)
# plot_grid(p_dv1, p_dv2, p_dv3, p_dv4, labels = LETTERS[1:4])

# 教材p20

-————————————————————————————————————————————————————————————————

调色 (Mark 👍)

离散型变量

自定义 (比色卡网站, 拾色已有的文章)

my_cols <- c('#00ADC8', '#D14749', '#FF912F', '#00B440', '#266199')
library(scales)
show_col(my_cols)

### 查看特定调色板的 十六进制颜色
# brewer_pal(palette = 'Set3')(5)
show_col(brewer_pal(palette = 'Set3')(5))

p_dv + scale_fill_manual(values = my_cols) # 自定义十六进制颜色

RColorBrewer (R内置调色板)

library(RColorBrewer)
par(mar=c(3,4,2,2))
display.brewer.all()

p_dv + scale_fill_brewer(palette = 'Set3') # R 自动整合了 RColorBrewer, 直接调用

ggsci (最常用)

#### (https://github.com/nanxstats/ggsci)
library(ggsci)
p_dv + scale_fill_npg() # 最常用

paletteer (最强大)

####  将包括 ggsci在内的多个调色板整合
####  (https://github.com/EmilHvitfeldt/r-color-palettes)
library(paletteer)

p_dv + scale_fill_paletteer_d('pals::kelly') # d - 离散型, c - 连续型 (pals 下的 kelley)

连续变量

p_cv <- ggplot(data = small_diamonds, aes(x = carat, y = price)) + 
  geom_point(shape = 21, size = 4, 
            color = 'black', aes(fill = depth)) + 
  theme_classic()

单色

p_cv + scale_fill_gradient(low = '#FCDAC9', high = '#7C0D0D')

渐变 (低, 中, 高)

自定义

p_cv + scale_fill_gradient2(low = '#2D4971', 
                            high = '#9F192B', 
                            mid = 'white', # 分别设置 低, 中, 高
                            midpoint = 60) # 中间色的位置

内置调色板

p_cv + scale_fill_gsea() 

paletteer

p_cv + scale_fill_paletteer_c('pals::kovesi.diverging_bwr_40_95_c42') # 自动计算了中间值

-————————————————————————————————————————————————————————————————

PCA

## PCAtools 默认生成的图
library(PCAtools)
gene_exp_PCA <- read.table(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/rnaseq-apple/gene_exp.txt', 
                       sep = '\t', header = T, row.names = 1)


sample_info_PCA <- read.table(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/rnaseq-apple/sample_info.txt', 
                          sep = '\t', header = T, row.names = 1)

pca <- pca(gene_exp_PCA, metadata = sample_info_PCA) # PCA 分析结果

biplot(pca, x = 'PC1', y = 'PC2') # PCA 自带的可视化

ggplot2 优化

数据预处理

#### 
library(tidyverse)

pca_res <- rownames_to_column(pca$rotated, var = 'sample_name') %>%
  left_join(rownames_to_column(sample_info_PCA, var = 'sample_name'), 
            by = 'sample_name')

作图

library(cowplot)

ggplot(pca_res, aes(x = PC1, y = PC2)) + 
  geom_point(size = 8, aes(shape = strain, fill = stage)) +
  scale_shape_manual(values = c(21, 24)) + 
  stat_ellipse(aes(color = stage)) + # 添加环状元素置信区间
  theme_half_open() + 
  # ggplot Theme Assistant
  theme(legend.direction = "horizontal", 
        legend.position = c(0.2, 0.9), axis.title = element_text(size = 14),
  plot.title = element_text(size = 15)) +labs(x = "PC1 (68% variance explained)", # 解析度来源 pca$variance
                                              y = "PC2 (11% variance explained)") + 
  # 解决图列没有映射颜色的问题
  guides(fill = guide_legend(override.aes=list(shape=21)))

-————————————————————————————————————————————————————————————————

火山图

导入数据和处理

library(readxl)
library(tidyverse)

#### Execl的导入 (右上窗口 - Import Dataset, 注意数值类型)
#### 生成代码
de_result_volcano <- read_excel("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/vocano/de_result.xlsx", 
    col_types = c("numeric", "text", "text", 
        "text", "text", "numeric", "numeric", 
        "numeric", "numeric", "text", "text", 
        "text", "text", "text", "text", "text", 
        "text", "text"), na = c("NA"))

#### 手动输入选取需要在火山图上展示的基因
selected_genes <- c('FMP27', 'ERG251', 'C5_04050W',
                    'C7_02530C', 'NOT5', 'C6_03800C',
                    'PMC1', 'FEN1', 'ERG3', 'FEN12',
                    'ERG25', 'ERG6', 'MVB12', 'FGR32',
                    'ERG28', 'ERG27')

res <- 
  select(de_result_volcano, Rank, GENE_NAME, log2FoldChange, pvalue) %>% 
  mutate(direction = if_else(pvalue > 0.05 | abs(log2FoldChange) < 1, # 增加 direction 列, if_else 判断
                             'NS', # 正确返回
                             if_else(log2FoldChange >= 1, # 否则返回进一步判断
                                     'UP', 
                                     'DOWN'))) %>%
  mutate(selected = if_else(GENE_NAME %in% selected_genes, 
                            'Show', '-'))


#### 根据 selected_genes 中字符从 res 中 GENE_NAME 提取行
# res_selected <- filter(res, GENE_NAME %in% selected_genes)

作图

library(ggrepel)

ggplot(res, aes(x = log2FoldChange, 
                y = -log10(pvalue))) + # pvalue以log10转换并取相反数
  geom_point(aes(color = direction), 
             size = 3, 
             show.legend = F) + # 去掉图列 (属于点图的图层)
  geom_point(data = filter(res, selected == 'Show'), # 重新指定 data, 筛选后的向量
            # data = res_selected, 
             size = 3, 
             shape = 21, # 带黑框的形状
             stroke = 1) + # 边框的粗细
  geom_text_repel(data = filter(res, selected == 'Show'), 
                # data = res_selected, 
                  size = 3, 
                  box.padding = 0.5, # 标签距离
                  aes(label = GENE_NAME)) +
  geom_hline(yintercept = -log10(0.05), # 添加另一个集合元素 (直线), 坐标为 -log10(0.05)
             linetype = 'dotdash', # 直线的样式 (虚线)
    # solid, dashed, dotted, dotdash, longdash, twodash             
             color = 'grey50') + 
  geom_vline(xintercept = c(-1,1), 
             linetype = 'dotdash', 
             color = 'grey50') + 
  scale_colour_manual(values = c('#1500FF', '#A9A9A9', '#FF0102')) + 
  ylim(0, 50) + # 设置y轴的范围
  labs(x = 'Log2(fold change)', 
       y = '-log10(p-value)') + 
  theme_half_open()

-————————————————————————————————————————————————————————————————

散点图

p_scatter_plot <-
  ggplot(data = small_diamonds, aes(x = carat, y = price)) + 
  geom_point(shape = 21, 
             size = 4, 
             stroke = 0.5, 
             aes(fill = cut)) + 
  scale_fill_npg() + 
  theme_classic()

添加拟合曲线

p_scatter_plot + geom_smooth(method = 'lm') 

# 选择拟合模式 (默认为局部拟合 loess, 这里的 lm 是线性拟合)
# 拟合出来的曲线位于对角线 (表示相关)

添加相关系数的备注信息

cor.test(small_diamonds$carat, small_diamonds$price, method = 'pearson') # 计算相关系数和 p-value
## 
##  Pearson's product-moment correlation
## 
## data:  small_diamonds$carat and small_diamonds$price
## t = 47.95, df = 498, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8896639 0.9210861
## sample estimates:
##       cor 
## 0.9066241
p_scatter_plot + geom_smooth(method = 'lm') + 
  # ggplot Theme Assistant
  theme(legend.position = c(0.1, 0.82)) + # 图例位置
  # 添加文本备注信息 (给予坐标位置)
  annotate('text', x= 2.2, y = 10, label = 'r = 0.917; pvalue = 2.2e-16')

添加边际图

library(ggExtra)

### 类似 ggplot Theme Assistant 用法
### 自动保存到向量 p1 中
p1 <- p_scatter_plot + geom_smooth(method = 'lm') + 
  # ggplot Theme Assistant
  theme(legend.position = c(0.1, 0.82)) + # 图例位置
  # 添加文本备注信息 (给予坐标位置)
  annotate('text', x= 2.2, y = 10, label = 'r = 0.917; pvalue = 2.2e-16')

### 生成代码
ggExtra::ggMarginal(
  p = p1,
  type = 'histogram',
  margins = 'both',
  size = 5,
  colour = 'black',
  fill = 'gray82'
)

# 扩展了解 ggstatsplot (https://github.com/IndrajeetPatil/ggstatsplot)

添加地毯线

p1 + geom_rug(aes(color = cut), # 添加地毯线
              length = unit(3, 'mm')) + # 设置绝对长度
scale_color_npg() # 更换地毯线的标度颜色, 与散点的颜色保持一致

-————————————————————————————————————————————————————————————————

继承与多态的关系

### 拟合曲线需要在多个点之间计算曲线 (群体几何对象)
### 散点只需要 x,y 轴值 (个体几何对象)
### 根据所有的点来拟合曲线?根据分组信息拟合曲线?(因此需要指定 group)
### group = 1 (不分组, 按所有点进行拟合), group = cut (按照 cut 组进行拟合)

ggplot(data = small_diamonds, aes(x = carat, y = price)) + 
  geom_point(shape = 21, 
             size = 4, 
             stroke = 0.5, 
             aes(fill = cut)) + # fill 的映射仅在 point 图层生效
  geom_smooth() + 
  scale_fill_npg() + # 不指定 group (默认不分组,以所有点进行拟合)
  theme_classic()

### 继承的理解
ggplot(data = small_diamonds, aes(x = carat, y = price, fill = cut)) + 
  geom_point(shape = 21, 
             size = 4, 
             stroke = 0.5) + # 虽然没有指定映射, 但继承背景图层关系
  geom_smooth() + 
  # 同理继承 并 按照已有的离散型映射分组 (这里背景中只有 cut 为离散型, carat 和 price 都为连续)
  scale_fill_npg() + 
  theme_classic()

### 多态的理解
ggplot(data = small_diamonds, aes(x = carat, y = price, fill = cut)) + 
  geom_point(shape = 21, 
             size = 4, 
             stroke = 0.5) + 
  geom_smooth(aes(color = cut)) + # 在继承的基础上自定义 (把曲线的颜色映射给cut)
  scale_fill_npg() + 
  theme_classic()

# exp:
HIV <- 
  tibble(Triplet = str_c('Triplet', 1:7, sep = ' '),
              From = c(rep('Zambia', 4), rep('South Africa', 3)), 
              `Group A` = c('28/1687 (1.64)', 
                            '33/2086 (1.57)', 
                            '23/1695 (1.36)', 
                            '41/2013 (2.04)', 
                            '36/1507 (2.35)', 
                            '26/1808 (1.43)', 
                            '13/2195 (0.57)'),
              `Group B` = c('19/1979 (0.94)', 
                            '29/2408 (1.20)', 
                            '22/1687 (1.30)', 
                            '19/1698 (1.13)', 
                            '33/1811 (1.80)', 
                            '26/2078 (1.24)', 
                            '10/2488 (0.40)'),
              `Group C` = c('24/2054 (1.17)', 
                            '33/2262 (1.48)', 
                            '29/1811 (1.63)', 
                            '37/1561 (2.39)', 
                            '28/1304 (2.15)', 
                            '32/1375 (2.31)', 
                            '14/2195 (0.59)'))

HIV_tidy <- 
  gather(HIV, key = Group, value = Value, 3:5) %>% 
# 对 HIV 的 3到5 进行转置处理
# 添加 Group, Value 表头
  separate(col = Value, # 对 Value 列进行处理
           sep = ' ',  # 设置分隔符类型, 这里以空格分割
           into = c('Number', 'Ratio')) %>% # 分割成两列的表头
  separate(col = Number, 
           sep = '/', 
           into = c('Num', 'Total')) %>% # 按/分割
  mutate(Ratio = as.numeric( # 设置数值类型
    str_remove_all( # 去掉字符
      Ratio, '\\(|\\)')), # 对 Ratio 列进行处理, ()特殊, 需要\\转译
         Num = as.numeric(Num),
         Total = as.numeric(Total))

# 标准
  filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>% 
  ggplot(aes(x = Group, y = Ratio)) + 
  geom_line(aes(
    color = Triplet, # 映射写在各几何图层
    group = Triplet)) + 
  geom_point(aes(
    color = Triplet, # 映射写在各几何图层
    size = Num)) +  
  scale_color_jco() + 
  scale_size(range = c(1, 4)) + 
  theme_classic()

# 背景图层映射的继承
  filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>% 
  ggplot(aes(x = Group, y = Ratio, 
             color = Triplet # 映射写在背景图层继承
             )) + 
  geom_line(aes(group = Triplet)) + 
  geom_point(aes(size = Num)) +  
  scale_color_jco() + 
  scale_size(range = c(1, 4)) + 
  theme_classic()

-————————————————————————————————————————————————————————————————

折线图

library(gapminder)
library(tidyverse)
library(cowplot)
data("gapminder")

普通折线图

filter(gapminder, country %in% c('China', 'India', 'Japan')) %>% # 对 gapminder 数据进行过滤并管道给 ggplot 作图
  ggplot(aes(x = year, y = lifeExp, color = country)) + # 按 country 进行分组并映射给颜色
  geom_line() + # 群体几何对象, 继承上一层的分组信息
  geom_point(shape = 21, size = 2, fill =  'white') + # 空心圆, 图层的顺序
  scale_color_aaas() + 
  theme_minimal_hgrid() + 
  # ggplot Theme Assistant
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) # x 轴标签旋转 45度, 并下移

配图折线图

### 写入数据
HIV <- 
  tibble(Triplet = str_c('Triplet', 1:7, sep = ' '),
              From = c(rep('Zambia', 4), rep('South Africa', 3)), 
              `Group A` = c('28/1687 (1.64)', 
                            '33/2086 (1.57)', 
                            '23/1695 (1.36)', 
                            '41/2013 (2.04)', 
                            '36/1507 (2.35)', 
                            '26/1808 (1.43)', 
                            '13/2195 (0.57)'),
              `Group B` = c('19/1979 (0.94)', 
                            '29/2408 (1.20)', 
                            '22/1687 (1.30)', 
                            '19/1698 (1.13)', 
                            '33/1811 (1.80)', 
                            '26/2078 (1.24)', 
                            '10/2488 (0.40)'),
              `Group C` = c('24/2054 (1.17)', 
                            '33/2262 (1.48)', 
                            '29/1811 (1.63)', 
                            '37/1561 (2.39)', 
                            '28/1304 (2.15)', 
                            '32/1375 (2.31)', 
                            '14/2195 (0.59)'))

长宽数据的理解

### 宽数据 (不利于数据分析)
姓名 语文 数学 英语
张三  70   80   90
李四  60   70   80

### 长数据
姓名 科目 成绩
张三 语文  70
张三 数学  80
张三 英语  90
李四 语文  60
李四 数学  70
李四 英语  80

宽数据转化为长数据

library(tidyverse)

HIV_tidy <- 
  gather(HIV, key = Group, value = Value, 3:5) %>% 
# 对 HIV 的 3到5 进行转置处理
# 添加 Group, Value 表头
  separate(col = Value, # 对 Value 列进行处理
           sep = ' ',  # 设置分隔符类型, 这里以空格分割
           into = c('Number', 'Ratio')) %>% # 分割成两列的表头
  separate(col = Number, 
           sep = '/', 
           into = c('Num', 'Total')) %>% # 按/分割
  mutate(Ratio = as.numeric( # 设置数值类型
    str_remove_all( # 去掉字符
      Ratio, '\\(|\\)')), # 对 Ratio 列进行处理, ()特殊, 需要\\转译
         Num = as.numeric(Num),
         Total = as.numeric(Total))

作图

# (强大的添加标签 R 包: https://ggrepel.slowkow.com/articles/examples.html)

p1 <- 
  filter(HIV_tidy, Group %in% c('Group A', 'Group C')) %>% # 对数据进行过滤, Group 中提取 A, C
  ggplot(aes(x = Group, y = Ratio, color = Triplet)) + # Triplet 映射 给 color 写在背景图层继承
  geom_line(aes(group = Triplet)) + # 按 Triplet 分组配对折线
  geom_point(aes(size = Num)) +  # 仅对点图层设置点的大小映射
  geom_text_repel( # 强大的添加标签 R 包
    data = filter(HIV_tidy, Group == 'Group A'), # 重定义分组信息, 取消背景图层的继承
    nudge_x = -0.1, # 标签在 x 轴上位移
    min.segment.length = Inf, # 去掉标签引线
    aes(label = str_remove(Triplet, 'Triplet'))) + # tidyverse 中去掉分割字符串
  geom_text_repel(
    data = filter(HIV_tidy, Group == 'Group C'), 
    nudge_x = 0.1, 
    min.segment.length = Inf, 
    aes(label = str_remove(Triplet, 'Triplet'))) +
  scale_color_jco() + 
  scale_size(range = c(1, 4), # 设置点大小刻度的范围
             # 统一图列 (设置点大小图列范围)
             breaks = seq(10, 50, 10), # 以 10 - 40 以 10 位间隔分割
             labels = seq(10, 50, 10), # 对应的图例标签
             limits = c(10, 50) # 确保 10 - 40 范围
            ) + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) + # 设置 y 轴从 0 开始, 范围 0 - 2.5
  labs(x = '', # 去掉 x 轴标题 (设置为空)
       y = 'HIV infections\n(per 100 persons)') + # 设置 y 轴标题
  theme_half_open()
p1

p2 <- 
  filter(HIV_tidy, Group %in% c('Group A', 'Group B')) %>% 
  ggplot(aes(x = Group, y = Ratio, color = Triplet)) + 
  geom_line(aes(group = Triplet)) + 
  geom_point(aes(size = Num)) +  
  geom_text_repel( 
    data = filter(HIV_tidy, Group == 'Group A'), 
    nudge_x = -0.1, 
    min.segment.length = Inf, 
    aes(label = str_remove(Triplet, 'Triplet'))) + 
  geom_text_repel(
    data = filter(HIV_tidy, Group == 'Group B'), 
    nudge_x = 0.1, 
    min.segment.length = Inf, 
    aes(label = str_remove(Triplet, 'Triplet'))) +
  scale_color_jco() + 
  scale_size(range = c(1, 4), 
             breaks = seq(10, 50, 10), 
             labels = seq(10, 50, 10), 
             limits = c(10, 50)) + 
  scale_y_continuous(expand = c(0, 0), limits = c(0, 2.5)) + 
  labs(x = '', 
       y = 'HIV infections\n(per 100 persons)') + 
  theme_half_open()
p2

拼图

library(patchwork)
# (强大的拼图 R 包)

p1 + p2 + 
  plot_layout(guides = "collect") & # 收集图列, 整合图列
  theme(legend.position = 'top') # 放置顶部

### 其他用法: 
### (https://patchwork.data-imaginist.com/articles/patchwork.html)

# p1 + p2 # 左右排列

# p1 / p2 # 上下排列

# p1 | (p2 / p3) # 1 2/3 排列

# p1 + p2 + p3 + 
#  plot_annotation(tag_levels = 'I') # 生成序号

# p1 + p2 + p3 + p4

# p1 + p2 + p3 + p4 + plot_layout(nrow = 3, byrow = FALSE) # 设定行列排布

# p1 + 
#  p2 + labs(subtitle = 'This will appear in the last plot') # 添加副标题

# (p1 | (p2 / p3)) + 
#  plot_annotation(title = 'The surprising story about mtcars') # 设置主标题

# etc.

-————————————————————————————————————————————————————————————————

条形图

## 读取数据
data('mtcars') # 有行名的 dataframe (不利于作图)
mtcars_table <- rownames_to_column(mtcars, var = 'car') %>% # 把行名作为变量
  mutate(cyl = factor(cyl)) # cyl 列转换为因子, 避免默认离散型变量的图列色条

mtcars_table$cyl # Levels - 因子
##  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
## Levels: 4 6 8

作图

ggplot(data = mtcars_table, aes(x = car, y = mpg)) + 
  geom_col(aes(fill = cyl), # aes(fill = factor(cyl))
           width = 0.7) + # 条形图的宽度 (0.7最好)
  scale_fill_npg() + 
  theme_minimal() + 
  theme(axis.text.x = element_text( # 对 x 轴标签字体属性进行修改
    angle = 90, # 旋转 90 度
    hjust = 1, # 标签左右位移, 0 - 左对齐, 1 - 右对齐
    vjust = 0.3), # 标签高度位移, 正往上位移, 负往下位移
        legend.position = 'top') # 图例放顶部

### 对数据进行排序
#### Rstudio 对离散型变量是默认是没有顺序, 按首字母进行排序
#### 需要对离散型变量进行排序, 则需要将其转换为 factor 并自定义设置 levels

mtcars_table <- rownames_to_column(mtcars, var = 'car') %>% 
  mutate(cyl = factor(cyl)) %>% 
  arrange(cyl, desc(mpg)) %>% # 默认升序排序, 先对 cyl 进行排序 = 以 cyl 来分组, 再对 mpg 从高到低排序
  mutate(mtcars_table, car = factor(car, levels = car))

z-score

### 描述数据高出平均值多少个标准差
### 处理数据
mtcars_z_score <- mutate(mtcars_table, mpg_z = (mpg - mean(mpg)) / sd(mpg)) %>% 
  # 计算与平均数相差的标准差倍数
  mutate(direction = if_else(mpg_z >= 0, 'higt', 'low')) %>% # 条件判断
  arrange(desc(direction), mpg_z) %>% 
  mutate(car = factor(car, levels = car))

#### 作图
ggplot(mtcars_z_score, aes(x = car, y = mpg_z)) + 
  geom_col(aes(fill = direction), width = 0.7) + 
  theme_classic() + 
  scale_fill_npg() + 
  theme_minimal() + # z-core 图适合 minimal 主题
  theme(axis.text.x = element_text( 
    angle = 90, 
    hjust = 1, 
    vjust = 0.3), 
        legend.position = 'top') + 
  theme(legend.position = "top", legend.direction = 'horizontal') +labs(fill = 'cyl') + 
    labs(x = NULL) + 
  coord_flip() # 翻转坐标系

棒棒糖图

# 点与线段组合
ggplot(data = mtcars_table, aes(x = car, y = mpg)) + 
  geom_point(aes(color = cyl, 
                 ), size = 3) + 
  geom_segment(aes(x = car, # 线段的 x 轴的起始位置
               xend = car,  # 线段的 x 轴的终止位置
               y = 1, # 线段的 y 轴的起始位置
               yend = mpg, # 线段的 y 轴的终止位置
               color = cyl, 
               width = 3)) + 
  scale_color_npg() + 
  theme_classic() + 
  theme(axis.text.x = element_text(angle = 90, 
    hjust = 1, 
    vjust = 0.3), 
        legend.position = 'top')  + 
  theme(legend.direction = "horizontal") + 
  labs(x = NULL, size = NULL)

扩展补充 (截断坐标轴) https://www.jianshu.com/p/de283990ecd1 https://stackoverflow.com/questions/7194688/using-ggplot2-can-i-insert-a-break-in-the-axis

堆叠条形图 (数值)

#### 适用的数据类型
# 大分类 包含 各个小分类的数据
# 各个 小分类 占总分类的对比
# 一般堆叠条形图和百分比堆叠条形图

#### 过滤数据
small_diamonds <- filter(diamonds, cut %in% c('Ideal', 'Premium', 'Good'))

#### 分组统计
small_diamonds_sum <- 
  group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
  summarise(count = n())

#### 作图
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) + 
  geom_col(aes(fill = cut), width = 0.7) + 
  scale_fill_lancet() + 
  scale_y_continuous(expand = c(0, 0)) + # y 轴坐标从 0 开始
  theme_classic()

#### 添加标签
# ggplot(data = small_diamonds_sum, aes(x = color, y = count)) + 
#   geom_col(aes(fill = cut), width = 0.7) + 
#   scale_fill_lancet() + 
#   scale_y_continuous(expand = c(0, 0)) + # y 轴坐标从 0 开始
#   # ? geom_text(aes(x = color, y = count, label = count)) + 
#   # (错误) y 轴位置需要计算 (与前一组分进行累加 再减去自身分组 count 的一半) 且 需要倒序后加标签
#   theme_classic()

# 处理数据
small_diamonds_sum <- 
  group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
  summarise(count = n()) %>% 
  arrange(color, desc(cut)) %>%
  mutate(cum_count = cumsum(count)) # 因为进行了分组, 所以只在组内累加

# 作图
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) + 
  geom_col(aes(fill = cut), width = 0.7) + 
  scale_fill_lancet() + 
  scale_y_continuous(expand = c(0, 0)) + 
  geom_text(aes(x = color, 
                y = cum_count - 0.5*count, 
                label = count), 
            color = 'white', 
            size = 3.5) + 
  theme_classic()

填充柱状图 (百分比)

small_diamonds_sum <- 
  group_by(small_diamonds, color, cut) %>% # 注意分组的顺序, 主分类放前面
  summarise(count = n()) %>% 
  arrange(color, desc(cut)) %>%
  mutate(cum_count = cumsum(count)) %>% # 因为进行了分组, 所以只在组内累加
  mutate(prop = count / sum(count)) %>% # 因为进行了分组, 所以 sum 为组内总数
  mutate(cum_prop = cumsum(prop))

ggplot(data = small_diamonds_sum, aes(x = color, y = prop)) + 
  geom_col(aes(fill = cut), width = 0.7) + 
  scale_fill_lancet() + 
  scale_y_continuous(expand = c(0, 0)) + 
  geom_text(aes(x = color, 
                y = cum_prop - 0.5*prop, 
                label = scales::percent(prop, accuracy = 1)), 
            # scales 包中将数值转换为百分比并保留小数
            color = 'white', 
            size = 3.5) + 
  theme_classic()

### 并排排列
ggplot(data = small_diamonds_sum, aes(x = color, y = count)) + 
  geom_col(aes(fill = cut), width = 0.7, 
           position = 'dodge') + # dodge - 躲避
  scale_fill_lancet() + 
  scale_y_continuous(expand = c(0, 0)) + 
  theme_classic()

-————————————————————————————————————————————————————————————————

饼图

## 读取数据
browsers <- read.csv(file = 
  '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ggplot/browers.csv'
  # 绝对路径读取
  ) %>%
  arrange(desc(version)) %>%
  mutate(cumsum_share = cumsum(share))

## 准备数据
browsers_sum <- group_by(browsers, browser) %>%
  summarise(browser_share = sum(share)) %>%
  arrange(desc(browser)) %>%
  mutate(cumsum_browser_share = cumsum(browser_share))

作图

library(ggrepel)
ggplot(data = browsers_sum, 
       aes(x = 'Cullwen', y = browser_share)) + 
         # 单个柱子不需要设置 x 的映射
  geom_col(color = 'black', aes(fill = browser)) + 
  geom_text(data = filter(
    browsers_sum, browser_share >= 5), 
    size = 3, 
    aes(y = cumsum_browser_share - 0.5*browser_share, # 标签的位置
    label = str_c(browser, '\n', browser_share, '%'))) + 
                # 字符串拼接, '\n' 换行符
  geom_text_repel(data = filter(
    browsers_sum, browser_share < 5), 
    size = 3, 
    nudge_y = 7, 
    segment.color = 'black', # 引线的颜色
    min.segment.length = 0, # 不管扰动距离多大都加引线
    aes(y = cumsum_browser_share - 0.5*browser_share, 
    label = str_c(browser, '\n', browser_share, '%'))) + 
  scale_fill_lancet() + 
  coord_polar(theta = 'y') + # 换成极坐标系, 并根据 y 折叠
  theme_nothing()

  # theme_void() # 保留图列的主题

甜甜圈图

ggplot(data = browsers_sum, 
       aes(x = 2, #
           y = browser_share)) + 
  geom_col(color = 'black', aes(fill = browser)) + 
  geom_text(data = filter(
    browsers_sum, browser_share >= 5), 
    size = 3, 
    aes(y = cumsum_browser_share - 0.5*browser_share, 
    label = str_c(browser, '\n', browser_share, '%'))) + 
  geom_text_repel(data = filter(
    browsers_sum, browser_share < 5), 
    size = 3, 
    min.segment.length = 0, 
    aes(x = 2.5, #
        y = cumsum_browser_share - 0.5*browser_share, 
    label = str_c(browser, '\n', browser_share, '%'))) + 
  xlim(0.5, 2.7) + #
  scale_fill_lancet() + 
  coord_polar(theta = 'y') + 
  theme_nothing()

  # theme_void() # 保留图列的主题

多维饼图

# a柱状图 + b柱状图 + y轴扭转

                 # a 柱子
ggplot(data = browsers_sum, 
       aes(x = 2, # 设置柱子的位置, 方便标签的设置
           y = browser_share)) + 
  geom_col(color = 'black', width = 0.95, # 两个柱子的间隙
           aes(fill = browser)) + 
                 # 正常标签
  geom_text(data = filter(
    browsers_sum, browser_share >= 5), # 分组设置标签
    size = 3, 
    aes(x = 2, y = cumsum_browser_share - 0.5*browser_share, 
    label = str_c(browser, '\n', browser_share, '%'))) + 
                  # 重叠标签
  geom_text_repel(data = filter(
    browsers_sum, browser_share < 5), 
    size = 3, 
    nudge_y = 7, # 扰动设置, 在非极坐标下较为容易观察设置
    segment.color = 'black', 
    min.segment.length = 0, 
    aes(y = cumsum_browser_share - 0.5*browser_share, 
    label = str_c(browser, '\n', browser_share, '%'))) + 
                  # b 柱子
  geom_col(data = browsers, 
           aes(x = 3, y = share, fill = version), 
           color = 'black', 
           width = 0.95) + 
                  # 正常标签
  geom_text(data = filter(
    browsers, share >= 4), 
    size = 3, 
    aes(x = 3, y = cumsum_share - 0.5*share, 
    label = str_c(version, '\n', share, '%'))) + 
                  # 重叠标签
  geom_text_repel(data = filter(
    browsers, share < 4), 
    size = 3, 
    nudge_x = 0.1, 
    segment.color = 'black', 
    min.segment.length = 0, 
    aes(x = 3.5, y = cumsum_share - 0.5*share, 
    label = str_c(version, '\n', share, '%'))) +
  scale_fill_igv() + 
  coord_polar(theta = 'y') + 
  theme_nothing()

-————————————————————————————————————————————————————————————————

直方图

data('diamonds')
ggplot(data = diamonds, aes(x = price)) + 
  geom_histogram(color = 'black', 
                 bins = 18, # 窗口数 (默认为x轴)
                 # binwidth = 300 - 窗口大小
                 position = 'dodge', # stack / fill
                 aes(fill = cut)) + 
  scale_fill_brewer(palette = 'Set1') + 
  scale_y_continuous(expand = c(0, 0)) + 
  scale_x_continuous(expand = c(0, 0)) +
  # ggplot Theme Assistant
  theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))

-————————————————————————————————————————————————————————————————

密度图

## Method01
ggplot(data = diamonds, aes(x = price)) + 
  geom_density(aes(fill = cut), alpha = 0.25) + 
  scale_fill_npg() + 
  # ggplot Theme Assistant
  theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))

## Method02
ggplot(data = diamonds, aes(x = price)) + 
  geom_density(aes(color = cut)) + 
  scale_color_brewer(palette = 'Set1') + 
  # ggplot Theme Assistant
  theme_minimal_hgrid() + theme(legend.position = c(0.83, 0.83))

山峦图

library(ggridges) # ggplot 无法画

### 山恋图 - 密度图各组分的向上平移

ggplot(data = diamonds, aes(x = price, y = cut)) + 
  geom_density_ridges(aes(fill = cut), alpha = 0.7) +
  scale_fill_npg() + 
  theme_half_open() + 
  # ggplot Theme Assistant
  theme(legend.position = "top", legend.direction = "horizontal") + # 图列顶部, 横向
  theme(legend.position = c(0.38, 0.91)) + # 图列坐标
  theme(legend.title = element_text(size = 11)) + # 图列标题大小
  theme(legend.text = element_text(size = 8)) # 图列模块大小

-————————————————————————————————————————————————————————————————

箱线图

前沿延伸

## 点图与散点图的区别
## 点图: y 轴 - 连续型变量, x 轴 - 离散型变量
## 散点图: x,y 轴且为连续型变量

ggplot(data = iris, aes(x= Species, y = Sepal.Length)) + 
  geom_point(aes(colour = Species), position = 'jitter') + 
  # 点的值可能重叠, jitter 参数进行扰动
  theme_cowplot()

## errbar

iris_sum <- 
  group_by(iris, Species) %>% # 对数据进行分组
  # 计算平均数和标准差并进行汇总
  summarise(Sepal.Length.mean = mean(Sepal.Length), 
            Sepal.Length.sd = sd(Sepal.Length))

ggplot(data = iris_sum, aes(x = Species, y = Sepal.Length.mean)) + 
  geom_point(aes(color = Species)) + 
  geom_errorbar(aes(ymax = Sepal.Length.mean + Sepal.Length.sd, 
                ymin = Sepal.Length.mean - Sepal.Length.sd, 
                color = Species), 
                width = 0.15) + 
  theme_cowplot()

作图

#### 最上面的线 - 最大值
#### 中的线 - 中位数
#### 最下方 — 最小值
#### 25%, 25%, 25%, 25%
#### 范围以外 - 异常值
library(ggsignif)
library(ggstatsplot)

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_boxplot(aes(fill = Species), color = 'black') + 
  # 添加 p-value
  # 0.05 - 显著, 0.01 - 很显著, 0.001 - 极其显著
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + # 已星号来表示 p-value
  geom_point(size = 3, shape = 21, fill = 'white', aes(color = Species), 
             position = position_jitter(width = 0.25), 
             # 进阶的 jitter, 限制扰动区域
             alpha = 0.5) + 
  scale_fill_npg() + 
  theme_cowplot()

箱线图的延伸

#### 比较组内差异
#### 分面

head(iris) # 宽数据 (列有Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, Species - 宽)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# 把宽数据转换为长数据便于作图
iris_tidy <- select(iris, Sepal = Sepal.Length, Petal = Petal.Length, Species) %>% 
  gather(key = Tissue, value = Length, 1:2
         # Sepal, Petal - 直接写列名
         )

#### 作图
ggplot(data = iris_tidy, aes(x = Tissue, y = Length)) + 
  geom_boxplot(aes(fill = Tissue)) + 
  facet_grid(~Species) + # 分面 (按某一列对数据进行分类) (行~列)
  geom_signif(comparisons = list(c("Petal", "Sepal"))) + 
  theme_cowplot()

小提琴图

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_violin( #
    aes(fill = Species), color = 'black', width = 0.4) + 
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + 
  geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species), 
             position = position_jitter(width = 0.05), 
             alpha = 0.5) + 
  scale_fill_npg() + 
  theme_cowplot()

融合图

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  # 箱线图 + 小提琴图
  geom_violin(aes(fill = Species), color = 'black', width = 0.4) +
  geom_boxplot(color = 'black', width = 0.05) + 
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + 
  geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species), 
             position = position_jitter(width = 0.05), 
             alpha = 0.5) + 
  scale_fill_npg() + 
  theme_cowplot() + 
  theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") + 
  theme(legend.text = element_text(size = 8.5),
  legend.title = element_text(size = 8.5))

蜂窝图

library(ggbeeswarm)

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_quasirandom( #
    shape = 21, color = 'black', aes(fill = Species), method = 'smiley', width = 0.4) + 
  # method: quasirandom, pseudorandom, smiley, fronwney
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + 
  scale_fill_npg() + 
  theme_cowplot()

云雨图

## 小提琴图的另一种形式
## 前言
library(gghalves)

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_half_violin(
    # gghalves 包: 一半小提琴图
    aes(fill = Species), color = 'black', width = 0.4) +
  geom_boxplot(color = 'black', width = 0.05) + 
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + 
  geom_point(size = 1.5, shape = 21, fill = 'white', aes(color = Species), 
             position = position_jitter(width = 0.05), 
             alpha = 0.5) + 
  scale_fill_npg() + 
  theme_cowplot() + 
  theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") + 
  theme(legend.text = element_text(size = 8.5),
  legend.title = element_text(size = 8.5))

作图

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_half_violin(
    aes(fill = Species), color = 'black', width = 1, alpha = 0.5, 
    position = position_nudge(x = 0.3, y = 0), # 小提琴图的扰动
    side = 'r', # 小提琴的方向
    adjust = 0.5) + # 平滑程度
  geom_boxplot(color = 'black', width = 0.05, aes(fill = Species), 
               position = position_nudge(x = 0.3, y = 0)) + # 箱线图的扰动
  geom_signif(comparisons = list(c('setosa', 'versicolor'), 
                                 c('setosa', 'virginica'), 
                                 c('versicolor', 'virginica')), 
              y_position = c(7.1, 8.0), 
              map_signif_level = T) + 
  geom_point(size = 0.5, #
             position = position_jitter(width = 0.15)) + #
  coord_flip() + # 旋转坐标系
  scale_fill_npg() + 
  theme_cowplot() + 
  theme(legend.position = c(0.58, 0.07), legend.direction = "horizontal") + 
  theme(legend.text = element_text(size = 8.5),
  legend.title = element_text(size = 8.5))

云云图

ggplot(data = iris, aes(x = Species, y = Sepal.Length)) + 
  geom_half_violin(fill = 'black', color = 'black', width = 1, alpha = 0.5, 
    position = position_nudge(x = 0.3, y = 0), 
    side = 'r', #
    adjust = 0.5) + 
geom_half_violin(
  aes(x = Species, y = Petal.Length, # 
      ), fill = 'white', color = 'black', width = 1, alpha = 0.5, 
    position = position_nudge(x = 0.3, y = 0), 
    side = 'l', #
    adjust = 0.5) + 
  coord_flip() + 
  theme_cowplot()

-————————————————————————————————————————————————————————————————

布局

分面

### facet_grid (简单分面)
mtcars_table <- rownames_to_column(mtcars, var = 'car') %>% 
  mutate(cyl = factor(cyl), 
         vs = if_else(vs == 1, 'V', 'L'), 
         am = if_else(am == 1, 'A', 'M'))

ggplot(mtcars_table, aes(x = wt, y = mpg)) + 
  geom_point(shape = 21, 
             alpha = 0.5, 
             aes(size = disp, fill = factor(cyl))) + 
  scale_fill_npg() + 
  scale_size(range = c(1, 20)) + 
  facet_grid(factor(vs)~factor(am), # 两个变量, vs(行)~am(列)
             # .~factor(am), 单变量 (am)
             scales = 'free') + # free, free_x, free_y
  theme_bw()

### facet_wrap (设置行列分面)
small_diamonds <- sample_n(diamonds, size = 500)
ggplot(data = small_diamonds, aes(x = carat, y = price)) + 
  geom_point(shape = 21, size = 2, 
             color = 'black', aes(fill = cut)) + 
  scale_fill_npg() + 
  facet_wrap(~color, nrow = 2) + # 单个变量 (color), 设置行列数
  theme_bw() + 
  theme(legend.position = c(0.89, 0.245))

### acet_matrix (多变量分面)
library(ggforce)

ggplot(mtcars_table, aes(x = .panel_x, y = .panel_y)) + # 非固定x, y轴映射
  geom_point(shape = 21, 
             aes(fill = factor(cyl))) + 
  scale_fill_npg() + 
  facet_matrix(vars(mpg, disp, wt, qsec)) + # 四个变量
  # facet_matrix(rows = vars(disp, wt), cols = var(mpg, qsec)) - 指定行列变量
  theme_bw()

局部放大

### 根据 映射/类别 放大
library(ggforce)

ggplot(iris, aes(x = Petal.Length, 
                 y = Petal.Width, color = Species)) + 
  geom_point(size = 3) + 
  scale_color_lancet() + 
  theme_test() + 
  facet_zoom(x = Species == 'versicolor', # x = (投影到 x 轴)
             zoom.size = 1) # 与原图的比例

### 根据 坐标数值范围 放大
ggplot(iris, aes(x = Petal.Length, 
                 y = Petal.Width, color = Species)) + 
  geom_point(size = 3) + 
  scale_color_lancet() + 
  theme_test() + 
  facet_zoom(xlim = c(3.5, 5.3), # 设置放大的区域 x 轴范围
             ylim = c(1.3,2), # 设置放大的区域 y 轴范围
             zoom.size = 1) 

### 放大部分添加标签
### 散点在 主, 放大图层都显示
### 标签仅在放大图层显示
### 对数据进行 Filter, 实现就一部分数据加标签
### facet_zoom: zoom = NA (主图和放大部分都显示)
###             zoom = TRUE (仅在放大部分显示) (标签图层的数据输入)
### 将放大部分的点另存为一个 table, zoom 赋值为 TRUE

iris_tbl_all <- mutate(iris, zoom = NA)
iris_tbl_zoom <- filter(iris, 
                        Petal.Length >= 3.5 & Petal.Length <= 5.3 & 
                          Petal.Width >= 1.3 & Petal.Width <= 2) %>% 
  mutate(zoom = TRUE)

ggplot(iris_tbl_all, aes(x = Petal.Length, 
                 y = Petal.Width, color = Species)) + 
  geom_point(size = 3) + 
  geom_text_repel(data = iris_tbl_zoom, aes(label = Species), nudge_x = 0.06, nudge_y = 0.005,box.padding = 0.2, size = 3) + 
  scale_color_lancet() + 
  theme_test() + 
  facet_zoom(xlim = c(3.5, 5.3), # 设置放大的区域 x 轴范围
             ylim = c(1.3,2), # 设置放大的区域 y 轴范围
             zoom.size = 1, 
             zoom.data = zoom) # NA, T, F 的信息处在列

### 离散型比变量的局部放大

### 准备数据
data("diamonds")
set.seed(100)
small_diamonds_zoom <- sample_n(diamonds, size = 500)

### facet_zoom 不适用于离散型变量
ggplot(data = small_diamonds_zoom, aes(x = color,y = price)) + 
  geom_point(shape = 21, size = 4, aes(fill = cut)) + 
  scale_fill_npg() + 
  theme_classic() + 
  facet_zoom(xlim = c('F', 'G', 'H'))
## Error in `train_scales()`:
## ! facet_zoom doesn't support zooming in discrete scales
# Error: facet_zoom doesn't support zooming in discrete scales (不适用于离散型变量)

### 优化处理数据
small_diamonds_zoom <- sample_n(diamonds, size = 500) %>% 
  mutate(color_num = as.numeric(color)) # 把离散型变量数字化 (首字母的大小)

### 作图
ggplot(data = small_diamonds_zoom, aes(x = color_num,y = price)) + 
  geom_point(shape = 21, size = 4, aes(fill = cut)) + 
  scale_fill_npg() + 
  theme_bw() + 
  scale_x_continuous(name = "color", # 修改 x 轴的标题
                     breaks = 1:7, # 设置 x 轴刻度间隔
                     labels = c('D', 'E', 'F', 'G', 'H', 'I', 'J')) + # 设置刻度对应的标签
  facet_zoom(xlim = c(2:4))

添加标记

### 圈出特定的点
library(ggforce)

ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) + 
  geom_point(size = 3) + 
  geom_mark_hull( # hull - 多边形, ellipse - 椭圆标记
    aes(label = Species) # 添加标签
    ) + 
  scale_color_lancet() + 
  theme_test() + 
  theme(legend.position = 'none')

添加子图表

### 图中表

#### 生成统计表
iris_stat <- group_by(iris, Species) %>%
  summarise(Petal.Length = mean(Petal.Length), 
            Petal.Width = mean(Petal.Width))

view(iris_stat)

#### 生成位置表
tbl_pos <- tibble(x = 9, y = 0.1, tb = list(iris_stat)) # tb 里存着的就是子表
# 子表的 x, y坐标, tb里放着子表 (统计表 - list - 可以是多个)

#### 作图
library(ggpmisc)

ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) + 
  geom_point(size = 3) + 
  geom_table( # 
    data = tbl_pos, # 切换数据 (位置表)
             aes(x = x, y = y, # 主图层的 x, y 轴
                 label = tb)) + # 统计表
  scale_color_lancet() + 
  theme_test() + 
  theme(legend.position = c(0.1, 0.8))

### 图中图

#### 生成子图
p0 <- ggplot(iris, aes(Species, Sepal.Length)) +
  geom_boxplot(aes(fill = Species), outlier.shape = 21) +
  scale_fill_lancet() +
  theme_classic() +
  theme(legend.position = 'none', 
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        plot.background = element_blank())

#### 生成位置表
plot_pos <- tibble(x = 9, y = 0.1, plot = list(p0)) # plot 里存的就是子图
view(plot_pos)

#### 作图
library(ggpmisc)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Species)) +
  geom_point(size = 3) +
  geom_plot( #
    data = plot_pos, 
    aes(x = x, y = y, 
        label = plot)) +
  scale_color_lancet() +
  theme_test() +
  theme(legend.position = c(0.1, 0.8))

-————————————————————————————————————————————————————————————————

拼图

library(patchwork) # 最常用的拼图包

### 准备图片
p1 <- ggplot(mtcars) + 
  geom_point(aes(mpg, disp, color = factor(cyl))) + 
  ggtitle('Plot 1') 

p2 <- ggplot(mtcars) + 
  geom_boxplot(aes(gear, disp, group = gear, fill = factor(gear))) + 
  ggtitle('Plot 2')

p3 <- ggplot(mtcars) + 
  geom_point(aes(hp, wt, color = factor(cyl))) + 
  ggtitle('Plot 3')

p4 <- ggplot(mtcars) + 
  geom_bar(aes(gear)) + 
  facet_wrap(~cyl) + 
  ggtitle('Plot 4')
### 默认拼图
p1 + p2 + p3

p1 + p2 + p3 + p4 # 简单拼图

### '|' 左右拼图, '/' 上下拼图
(p1 | p2) / p3 / p4

### 自定义拼图
p1 / ((p2/p3) | p4)

### 调整比例
p1 / ((p2/p3) | p4) + plot_layout(heights = c(1,3), widths = c(1,6))

### 添加图序号
p1 / ((p2/p3) | p4) + 
  plot_annotation(tag_levels = 'A') # 'a', 'I', '1'

### 同时设置图的主题元素

p1 / ((p2/p3) | p4) + 
  plot_layout(heights = c(1,3), widths = c(1,6)) + 
  plot_annotation(tag_levels = 'A') & # '&' 同时设置
  theme_bw() & # 主题风格
  theme(axis.title = element_text(size = 20)) # 主题字体大小

### 图列收集
#### 默认 (自动识别相同的图列并收集)

p6 <- p1 / ((p2/p3) | p4) + 
  plot_layout(heights = c(1,3), widths = c(1,6)) + 
  plot_annotation(tag_levels = 'A') + # 'a', 'I', '1'
  plot_layout(guides = 'collect') & 
  theme_bw() # 主题风格 (& 同时设置)
p6

#### 设置一个空区域放置图列

p5 = p3
p1 + p3 + p5 + 
  plot_layout(guides = 'collect') + # 收集图列
    guide_area() & # 设置一个空区域放置收集好的图列
  theme_bw()

-————————————————————————————————————————————————————————————————

韦恩图

## 反应交集
## 在线工具: http://www.interactivenn.net/
library(ggvenn) # ggplot 的语法 (https://github.com/yanlinlin82/ggvenn)

## 读取数据 (需要向量 / 储存向量的 list)
Orthogroups <- read_delim("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/Orthogroups.GeneCount.tsv", 
    "\t", escape_double = FALSE, trim_ws = TRUE)

## 处理数据
sesame <- filter(Orthogroups, Sind > 0) %>% pull(Orthogroup)
tomato <-filter(Orthogroups, Slyc > 0) %>% pull(Orthogroup)
potato <- filter(Orthogroups, Stub > 0) %>% pull(Orthogroup)
grape <- filter(Orthogroups, Vvin > 0) %>% pull(Orthogroup)

venn <- list(sesame = sesame, tomato = tomato, potato = potato, grape = grape)

## data type: list, data.frame
ggvenn(venn, c("sesame", "tomato", "potato", "grape"), 
       stroke_linetype = 1, # 线条的类型
       stroke_size = 1, # 线条粗细
  set_name_color = "black", 
  set_name_size = 5, 
# digits = 2, # 保留百分比后几位
  show_percentage = FALSE) + # 展示百分比 (默认为 TRUE)
  scale_fill_npg()

提取 ID

library(VennDetail) # https://github.com/guokai8/VennDetail

res_venndetail <- venndetail(venn) # list 列表
result_venndetail <- result(res_venndetail)
# view(head(result_venndetail)) # 列出重叠 / 不相交的值 / ID
# filter(result_venndetail, Subset == 'sesame_tomato_potato') # 查看 sesame_tomato_potato 相交ID的个数

usetplot

### 韦恩图最大长度为 6 个向量
### usetplot - 韦恩图另一种形式

library(UpSetR)
library(ggupset)
library(ComplexUpset)

### 处理数据
Orthogroup_usetplot <- select(Orthogroups, -Total)
Orthogroup_usetplot[2:5] <- Orthogroup_usetplot[2:5] > 0

### 作图
upset(Orthogroup_usetplot, 
#     c(Slyc, Stub, Sind, Vvin), # 手动输入
      colnames(Orthogroup_usetplot[2:5]), 
      height_ratio = 0.4, 
      width_ratio = 0.2, 
      min_size = 0, 
      name = NULL) # x 轴范围

-————————————————————————————————————————————————————————————————

相关系数图 (少量的数据)

# https://blog.csdn.net/woodcorpse/article/details/106553931
# 与树状图类似 (适合数据量多) 

# devtools::install_github("hannet91/ggcor")
library(ggcor)

# mechanical 与 performance 相关
mtcar_mechanical <- mtcars[, c('mpg', 'qsec')]
mtcar_performance <- mtcars[, c('cyl', 'disp', 'hp', 'drat', 'wt', 'vs', 'am')]

# 相关系数矩阵图
# 表内任意两列间的相关系数

quickcor(mtcar_performance, 
         type = 'upper', # upper, lower, full
         cor.test = T) + # 计算 P 值
  geom_square() + # geom_square, geom_circle2, geom_color, geom_ellipse2
  geom_mark(size = 2.5) + # 字体大小
  scale_fill_gradient2(low = '#2D4971', 
                            high = '#9F192B', 
                            mid = 'white', 
                            midpoint = 0) # 连续型数据图列映射

quickcor(mtcar_performance, 
         cor.test = T) + # 设置背景主题默认 fill
  geom_square(data = get_data( # get_data 获取背景图层的数据映射
    type = 'upper', # 把相关系数矩阵 (右上角)
    show.diag = F)) + # 忽略对角线
  geom_mark(data = get_data(
    type = 'lower', # P 值 (左下角)
    show.diag = F), 
    size = 2.5) + 
  geom_abline(slope = -1, # 斜率
              intercept = 8, # 截距
              linetype = 'dotted', 
              color='grey', 
              size = 1) + # 添加对角分割线
        # solid, dashed, dotted, dotdash, longdash, twodash
  scale_fill_gradient2(low = '#2D4971', 
                            high = '#9F192B', 
                            mid = 'white', midpoint = 0) + 
  theme_cowplot()

# 两表间的相关系数图
link_cor <- correlate(mtcar_mechanical, mtcar_performance, cor.test = T) %>% # 计算两表的相关性
  as_cor_tbl() %>% # 把结果转换为 table 格式
  select(mechanical = .row.names, performance = .col.names, r, p.value) %>% # 修改列名
  mutate( # 增加列
    rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf), # 将 r 列的数据切为为多个窗口
           # (-∞, 0.2), (0.2, 0.4), (0.4, +∞), 线条粗细的映射
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")), # 并给予标签, 图列
    pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf),
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))

view(link_cor)

# 作图
# part01
quickcor(mtcar_performance, 
         type = 'upper', 
         cor.test = T) + 
  geom_square() + 
  geom_mark(size = 2.5) + 
  scale_fill_gradient2(low = '#2D4971', 
                            high = '#9F192B', 
                            mid = 'white', midpoint = 0) + 
# part02
  ggcor::anno_link(data = link_cor, aes(color = pd, size = rd)) + 
  scale_size_manual(values = c(0.5, 1, 2)) + 
  scale_colour_manual(values = c("#D95F02", "#1B9E77", "#A2A2A288"))
## Error: 'anno_link' is not an exported object from 'namespace:ggcor'
# WGCNA 应用
# 加载数据
load("/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ggcor/wgcna.rdata")
view(net) # WGCNA 生成

# 处理数据
WGCNA_MEs <- net$MEs
view(head(WGCNA_MEs))
colnames(WGCNA_MEs) <- # 读取列名
  str_remove(colnames(WGCNA_MEs), 'ME') # 赋予修改 (移除 MEs 字符)
view(head(WGCNA_MEs))

# 准备数据
link_cor_WGCNA <- 
  correlate(datTraits[,1:3], # 选取 WGCNA结果 (datTraits) 的 1-3 列数据
            WGCNA_MEs, 
            cor.test = T, 
            use = "p") %>% # 跳过空值
  as_cor_tbl() %>% 
  select(Traits = .row.names, 
         Modules = .col.names, 
         r, p.value) %>%
  mutate(rd = cut(r, breaks = c(-Inf, 0.2, 0.4, Inf),
                  labels = c("< 0.2", "0.2 - 0.4", ">= 0.4")), 
         pd = cut(p.value, breaks = c(-Inf, 0.01, 0.05, Inf), 
                  labels = c("< 0.01", "0.01 - 0.05", ">= 0.05")))

# 作图
# part01
quickcor(WGCNA_MEs, 
         type = 'upper', 
         cor.test = T, 
         show.diag = F) + 
  geom_square() + 
  scale_fill_gradient2(low = '#2D4971', 
                            high = '#9F192B', 
                            mid = 'white', midpoint = 0) + 
# part02
  ggcor::anno_link(data = link_cor_WGCNA, aes(color = pd, size = rd)) + 
  scale_size_manual(values = c(0.5, 1, 1.5)) + 
  scale_colour_manual(values = c("#D95F02", "#1B9E77", "#A2A2A288")) +
                    # + AI 调整图列位置
  add_link(link_cor_WGCNA, mapping = aes(colour = p.value, size = r),
           diag.label = TRUE) +
  scale_size_manual(values = c(0.5, 1.5, 3)) +
  geom_diag_label() + remove_axis("x")
## Error: 'anno_link' is not an exported object from 'namespace:ggcor'

-————————————————————————————————————————————————————————————————

热图

## 相当于矩阵中的数值用颜色的深浅表示
## cluster 聚类 (表达模式相似 - 随着细胞周期变化 A基因上调, B基因下调...)

pheatmap

library(pheatmap)

### 导入数据
load(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/geo-cesc/prepare.rdata')
load(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/geo-cesc/de.rdata')

sample_info_pheatmap <- sample_info
de_result_pheatmap <- de_result


### 处理数据
#### 提取 cancer 和 normal 样本编号
cancer_normal_samples <- 
  rownames_to_column(sample_info, var = 'sample_id') %>% 
  filter(group == 'Cancer' | group == 'Normal')  %>% # '==' 判断 
  pull(sample_id)

#### 提取 cancer 和 normal 样本信息表
cancer_normal_samples_info <- 
  sample_info[cancer_normal_samples, ] # cancer_normal_samples 存有目标样品的编号
# 匹配 cancer_normal_samples 里编号, 提取对应行列

#### 提取前 20 个差异最大基因的表达矩阵  
top20_de_pheatmap <- select(de_result_pheatmap, Gene_Symbol, one_of(cancer_normal_samples)) %>% 
  filter(!is.na(Gene_Symbol)) %>% 
  distinct(Gene_Symbol, .keep_all = T) %>% 
  dplyr::slice(1:20) %>% 
  column_to_rownames(var = 'Gene_Symbol')

#### 作图
view(top20_de_pheatmap) # 行 (样品), 列 (基因)

pheatmap(top20_de_pheatmap, 
         show_colnames = F, # 样品太多, 去除样品名称
         cellwidth = 6, 
         cellheight = 6, # cell (格子), 长等宽 (正方形)
#        color = colorRampPalette(c("green","white","red"))(100), 
            # 设置格子颜色 (过渡区间-越高越流畅) (推荐默认)

#        display_numbers = T, # 格子中展示数据
         fontsize = 8, # fontsize_row (col) 对行/列 字体大小单独设置
         cutree_cols = 3, # 对列进行切分
         annotation_col = dplyr::select( # 对切分的列注释
           cancer_normal_samples_info, # 输入为 dataframe 
           group), # 提取样品信息表的 group 列
         annotation_colors = list(
            group = c('Cancer' = '#fc8d59', 
                      'Normal' = '#99d594')), # 设置列注释颜色
         )

当表达矩阵中有异常值 (过低/过高)

abnormal_df <- top20_de_pheatmap
abnormal_df[1,1] = 50
view(abnormal_df)

# 默认
pheatmap(abnormal_df, 
         show_colnames = F, 
         cellwidth = 8, 
         cellheight = 8, 
         fontsize = 8, 
         cutree_cols = 3, 
         annotation_col = dplyr::select( 
           cancer_normal_samples_info, 
           group), 
         annotation_colors = list(
            group = c('Cancer' = '#fc8d59', 
                      'Normal' = '#99d594')))

## Method 01 (设置颜色跨度 - 次次之)
pheatmap(abnormal_df, 
         show_colnames = F, 
         cellwidth = 6, 
         cellheight = 6, 
         fontsize = 6, 
         cutree_cols = 2, 
         annotation_col = dplyr::select(
           cancer_normal_samples_info, 
           group), 
         annotation_colors = list(
            group = c('Cancer' = '#fc8d59', 
                      'Normal' = '#48b9c4')), 
         # 通过减少格子颜色映射的跨度
         color = colorRampPalette(c("#99d594","#fdfed8","#a9393d"))(18), 
         breaks = seq(0, 18, 1), # 最高到最低由 18 个颜色区分
         legend_breaks = seq(0, 18, 1), 
         legend_labels = seq(0, 18, 1)
         )

## Method 02 (取对数 - 次之)
pheatmap(log10(abnormal_df + 1), 
            # 取对数
         show_colnames = F,
         cellwidth = 6,
         cellheight = 6,
         fontsize = 6,
         cutree_cols = 2,
         annotation_col = dplyr::select(
           cancer_normal_samples_info, 
           group),
         annotation_colors = list(
            group = c('Cancer' = '#fc8d59', 
                      'Normal' = '#99d594'))
         )

# Method 03 (标准化 - 优先)
pheatmap(abnormal_df,
         scale = 'row', # row 描述基因表达量的差异 (标准化)
         show_colnames = F, 
         cellwidth = 6, 
         cellheight = 6, 
         fontsize = 6, 
         cutree_cols = 2, 
         annotation_col = dplyr::select(
           cancer_normal_samples_info, 
           group), 
         annotation_colors = list(
            group = c('Cancer' = '#fc8d59',  
                      'Normal' = '#99d594'))
         )

Heatmap

library(ComplexHeatmap) 
library(circlize)

# 默认
Heatmap(top20_de_pheatmap, 
#       show_column_dend = F, # 展示列的聚类
        show_column_names = F, # 列名 (样品名)
        # 图例加名字
        name = "Expression", 
        # 加表标题 
        column_title = "Gene Expression Heatmap",
        column_title_side = "top" # 'bottom - 底部' 
        )

# 添加注释
column_ha <- HeatmapAnnotation(
  group = anno_simple(cancer_normal_samples_info$group), # 癌症/正常 (简单-色块)
  test1 = anno_points(cancer_normal_samples_info$test1), # 身高 (点)
  test2 = anno_lines(cancer_normal_samples_info$test2), # 体重 (折线)
  test3 = anno_barplot(cancer_normal_samples_info$test3), # 年龄 (直方图)
  col = list(group = c(Cancer = '#e065af', Normal = '#fee0d2'))) # 离散型变量

Heatmap(top20_de_pheatmap, 
#       show_column_dend = F, # 展示列的聚类
        show_column_names = F, # 列名 (样品名)
        # 图例加名字
        name = "Expression", 
        # 加表标题 
        column_title = "Gene Expression Heatmap",
        column_title_side = "top", # 'bottom - 底部' 
        col = colorRamp2(breaks = c(0, 10, 20), 
                            colors = c("#6cc08b","white","#ef6a4c")), 
               # 通过 circlize 包创建一个 Heatmap 的颜色刻度
#       border = 'black', # 外边框
        rect_gp = gpar(col = '#737373', # 内边框颜色和粗细
                       lwd = 1.5), 
        row_names_gp = gpar(fontsize = 8), 
        column_title_gp = gpar(fontface = 'bold'), # 标题加粗
#       column_km = 3, # 对列聚类进行分割
        column_split = cancer_normal_samples_info$group, # 按照 group 分割 (cancer/normal)
        bottom_annotation = column_ha, # class(column_ha) - funtion (注释信息)
        column_gap = unit(0.015, 'npc') # 设置分割间隙距离 (npc - 相对主图的大小)
        )

案例

# 读取数据
mat_expr <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/mat_expr.csv', 
                     row.names = 1)
mat_meth <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/mat_meth.csv', 
                     row.names = 1)
sample_info_Heatmap <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/sample_info.csv', 
                        row.names = 1)
gene_info_Heatmap <- read.csv(file = '/Volumes/Cullwen_SSD/Genek/5 R语言出版级绘图(2020)/plot_training_20200701/data/ComplexHeatmap/example3/gene_info.csv', 
                      row.names = 1)
# Heatmap01 <- 
# 关于样品的排序, 第一个热图聚类后顺序已决定, 后面的热图依赖首个热图
# No.1 Heatmap
Heatmap(mat_meth,
        name = 'Methylation', # 图列名称
        col = colorRamp2(breaks = c(0, 0.5, 1), 
                         colors = c("blue", "white", "red")), # cell 颜色刻度和映射
        show_row_names = F, 
        cluster_columns = F, 
        column_title = 'Methylation', # 列的标题
        column_title_gp = gpar(fontface = 'bold'), 
        top_annotation = HeatmapAnnotation( # 注释的设置
          type = sample_info_Heatmap$type, 
          col = list(type = c("Tumor" = "pink", 
                              "Control" = "royalblue")), # 离散型变量
          annotation_name_side = 'left'), # 位置放在坐标
        row_km = 2, # 对行聚类分割 (2类)
        row_split = gene_info_Heatmap$meth_direction # 根据 gene_info_Heatmap$meth_direction 分割
        ) + 
# No.2 Heatmap
Heatmap(gene_info_Heatmap$meth_direction, 
        name = 'meth direction', 
        col = c('hypo' = 'blue', 'hyper' = 'red')) + 
# No.3 Heatmap
Heatmap(mat_expr, 
          name = 'Expression',
          col = colorRamp2(breaks = c(-2, 0, 2), 
                           colors = c("green", "white", "red")), 
          show_row_names = F, 
          cluster_columns = F, 
          column_title = 'Expression', 
          column_title_gp = gpar(fontface = 'bold'), 
          top_annotation = HeatmapAnnotation(
          type = sample_info_Heatmap$type, 
          col = list(type = c("Tumor" = "pink", 
                              "Control" = "royalblue")), 
          show_annotation_name = F)) + 
# No.4 Heatmap
Heatmap(gene_info_Heatmap$cor_pvalue, # 相关性的可靠程度
        name = '-log10(cor_p)', 
        col = colorRamp2(breaks = c(0, 2, 4), # 连续型变量
                         colors = c("white", "white", "red"))) + 
# No.5 Heatmap
Heatmap(gene_info_Heatmap$gene_type, 
        name = 'gene_type', 
        col = c('protein_coding' = '#8DD3C7', 
                  'psedo-gene' = '#FEFBB3', 
                  'lincRNA' = '#BEBADA', 
                  'others'= '#F07F70', 
                  'microRNA' = '#80B1D4')) + 
# No.6 Heatmap
Heatmap(gene_info_Heatmap$gene_anno, 
        name = 'gene_anno') + 
# No.7 Heatmap
rowAnnotation(dist_tss = 
                anno_barplot(gene_info_Heatmap$dist)) # 直方图的形式

Heatmap01 <- draw(Heatmap01, heatmap_legend_side = 'bottom') # 将图列放在底部
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'draw': object 'Heatmap01' not found
top100_de_Heatmap <- select(de_result_pheatmap, Gene_Symbol, one_of(cancer_normal_samples)) %>% 
  filter(!is.na(Gene_Symbol)) %>% 
  distinct(Gene_Symbol, .keep_all = T) %>% 
  dplyr::slice(1:100) %>% 
  column_to_rownames(var = 'Gene_Symbol')


# 行注释标注关键基因
## 生成数据
set.seed(123) # 随机种子
key_genes_Heatmap02 <- sample(rownames(top100_de_Heatmap), 
                    size = 20, 
                    replace = F)
## 判断 key_genes_Heatmap02 位于 top100_de_Heatmap 里的位置
which(rownames(top100_de_Heatmap) %in% key_genes_Heatmap02)
##  [1]  7  9 14 25 26 31 36 42 43 50 51 57 67 69 72 79 87 90 95 97
## 构建注释信息 funtion
row_ha <- 
  rowAnnotation(
    key_genes_Heatmap02 = 
      anno_mark(at = which(rownames(top100_de_Heatmap) %in% key_genes_Heatmap02), 
                labels = key_genes_Heatmap02, 
                labels_gp = gpar(fontsize = 8)
                )
    )

Heatmap(top100_de_Heatmap, 
        show_row_names = F, 
        show_column_names = F, 
        name = 'Expressio', 
        column_title = 'Gene Expression', 
        column_title_gp = gpar(fontface = 'bold'), 
        col = colorRamp2(breaks = c(0, 10, 20), 
                         colors = c("green", "white", "red")), 
        rect_gp = gpar(col = 'white', # 添加内框
                       lwd = 1), 
        column_km = 3, 
        column_gap = unit(0.015, 'npc'), 
        right_annotation = row_ha
        )

column_ha <- HeatmapAnnotation(
  group = anno_simple(cancer_normal_samples_info$group), 
  height = unit(15, 'mm'), # 癌症/正常 (简单-色块)
  test1 = anno_points(cancer_normal_samples_info$test1, # 身高 (点)
                      axis = F, # 不显示标签
                      size = unit(1.5, 'mm'), # 设置点的大小
                      height = unit(5, 'mm')), # 设置高度
  test2 = anno_lines(cancer_normal_samples_info$test2, # 体重 (折线)
                     axis_param = list(at = c(0, 50, 100), 
                                       labels = c('', #
                                                  '50', '100')),
     # 0 刻度对应的标签设置为空字符串, 避免与下一个图的 100 刻度标签重叠
     height = unit(5, 'mm')), 
  test3 = anno_barplot(cancer_normal_samples_info$test3, # 年龄 (直方图)
                             height = unit(5, 'mm')), 
  col = list(group = c('Cancer' = '#e065af', 'Normal' = '#fee0d2')), # 离散型变量
  annotation_name_gp = gpar(fontsize = 9, # 修改注释字体大小
                            fontface = 'italic') # 斜体
  )

Heatmap(top100_de_Heatmap, 
        show_row_names = F, 
        show_column_names = F, 
        name = 'Expressio', 
        column_title = 'Gene Expression', 
        column_title_gp = gpar(fontface = 'bold'), 
        col = colorRamp2(breaks = c(0, 10, 20), 
                         colors = c("green", "white", "red")), 
        rect_gp = gpar(col = 'white', # 添加内框
                       lwd = 1), 
        column_km = 3, 
        column_gap = unit(0.015, 'npc'), 
        right_annotation = row_ha, 
        bottom_annotation = column_ha
        )